%load_ext autoreload
%autoreload 2
import sys
sys.path.append("..")
import pandas as pd
from numpy.random import random
import datetime as dt
from statsmodels.tsa.stattools import acf, pacf
import statsmodels.api as sm
from scipy.signal import periodogram
import math
from pytsviz.viz import *
dataset_size=366
n_features = 3
Target series
t_axis = pd.date_range(start='1/1/2020', periods = dataset_size, freq="D", name="t")
y = (1 + random(dataset_size)) * np.linspace(6, 9, dataset_size) + \
(1 + random(dataset_size)) * np.sin(np.linspace(0, 10*np.pi, dataset_size)) + \
(1.5 + random(dataset_size)) * np.cos(np.linspace(0, 5.4*np.pi, dataset_size))
ts = pd.Series(index=t_axis, data=y, name="y")
ts_df = ts.to_frame()
Features
feat_matrix = random((n_features, dataset_size))
feat_df = pd.DataFrame(index=t_axis, data={f"x_{i}": feat_matrix[i] for i in range(n_features)})
Forecast
ext_t_axis = pd.date_range(start='2/1/2020', periods = dataset_size, freq="D", name="t")
y_hat = y * (1 + 0.1 * random())
forecast = pd.Series(index=ext_t_axis, data=y_hat, name="y_hat")
forecast_df = forecast.to_frame()
c_lower = [y_hat[i] - (0.001 * i) ** 1.2 for i in range(len(y_hat))]
c_upper = [y_hat[i] + (0.001 * i) ** 1.2 for i in range(len(y_hat))]
confidence_df = pd.DataFrame(index=ext_t_axis, data={"lower confidence": c_lower, "upper confidence": c_upper})
total_df = pd.concat([ts, feat_df, forecast_df, confidence_df], axis = 1)
total_df
| y | x_0 | x_1 | x_2 | y_hat | lower confidence | upper confidence | |
|---|---|---|---|---|---|---|---|
| t | |||||||
| 2020-01-01 | 8.079131 | 0.242302 | 0.557478 | 0.601941 | NaN | NaN | NaN |
| 2020-01-02 | 10.650662 | 0.929234 | 0.557489 | 0.376543 | NaN | NaN | NaN |
| 2020-01-03 | 12.083600 | 0.809896 | 0.709311 | 0.864642 | NaN | NaN | NaN |
| 2020-01-04 | 10.490799 | 0.226948 | 0.019853 | 0.784072 | NaN | NaN | NaN |
| 2020-01-05 | 9.314644 | 0.217527 | 0.305872 | 0.584400 | NaN | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 2021-01-27 | NaN | NaN | NaN | NaN | 11.590459 | 11.296011 | 11.884907 |
| 2021-01-28 | NaN | NaN | NaN | NaN | 10.132183 | 9.836756 | 10.427610 |
| 2021-01-29 | NaN | NaN | NaN | NaN | 14.192122 | 13.895716 | 14.488529 |
| 2021-01-30 | NaN | NaN | NaN | NaN | 17.808586 | 17.511200 | 18.105973 |
| 2021-01-31 | NaN | NaN | NaN | NaN | 13.055010 | 12.756642 | 13.353377 |
397 rows × 7 columns
fig = plot_ts(total_df, y_cols=["y", "x_0"], tf="log", tf_args = (np.e,), keep_original=True)
func1 = lambda x : x.isocalendar().week
func2 = lambda x : x.isocalendar().day
plot_seasonal_ts(ts_df, period="month", subplots=True)
plot_decomposed_ts(ts_df, method = "STL", subplots = True)
inverted_df = pd.DataFrame(total_df.values[::-1], total_df.index, total_df.columns)
#forecast_plot(inverted_df, ts_col = "y", fc_cols = ["y_hat"], lower_col = "lower confidence", upper_col = "upper confidence")
y_hat_2 = pd.Series(data=y_hat*0.9, index = ext_t_axis, name = "y_hat_2")
multiple_fc_df = pd.concat([total_df, y_hat_2], axis = 1)
plot_forecast(multiple_fc_df, y_col = "y", fc_cols = ["y_hat"], upper_col = "upper confidence", lower_col = "lower confidence")
plot_scatter_matrix(total_df, var1 = "x_0", var2="y", lags1 = [5, 10, 15])
plot_scatter_fit(total_df, "x_0", "x_1", fit = "summary")
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 0.000
Model: OLS Adj. R-squared: -0.002
Method: Least Squares F-statistic: 0.1396
Date: Tue, 09 Mar 2021 Prob (F-statistic): 0.709
Time: 11:29:20 Log-Likelihood: -49.025
No. Observations: 366 AIC: 102.0
Df Residuals: 364 BIC: 109.9
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const 0.4909 0.029 16.660 0.000 0.433 0.549
x1 -0.0190 0.051 -0.374 0.709 -0.119 0.081
==============================================================================
Omnibus: 119.146 Durbin-Watson: 1.808
Prob(Omnibus): 0.000 Jarque-Bera (JB): 18.815
Skew: 0.063 Prob(JB): 8.21e-05
Kurtosis: 1.896 Cond. No. 4.46
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
plot_acf(ts_df, show_threshold = True)
plot_acf(ts_df, partial=True, alpha = 0.1, show_threshold = True)
plot_psd(ts_df, scaling="spectrum")
plot_distribution(total_df, bins=100, title="Distribution")
arparams = np.array([.75, -.25])
maparams = np.array([.65, .35])
ar = np.r_[1, -arparams] # add zero-lag and negate
ma = np.r_[1, maparams] # add zero-lag
arma_process = sm.tsa.ArmaProcess(ar, ma)
plot_inverse_arma_roots(arma_process)
plot_extended_scatter_matrix(total_df.dropna(), y_cols=["y", "x_1", "x_2"])
plot_ts_overview(ts_df)
plot_ts_analysis(ts_df)
plot_gof(total_df, "y", "y_hat")